
from init import PATHS
import logging
LOGGER = logging.getLogger(__name__)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('seaborn')
import seaborn as sns
sns.set(style="white")
pal = sns.color_palette('colorblind')
from C_PatentVariables import conventions_names_colors
dict_subsector_shortnames, dict_var_colors, list_of_subsectors_in_cleancars = conventions_names_colors.main()


def main():
    LOGGER.info('Begin c_othergraphs_crosssectoral.py')
    plot_bat_fc_patenting_bysector_bvdcoverage()
    plot_bat_fc_patenting_bysector_psncoverage()
    LOGGER.info('END c_othergraphs_crosssectoral.py')


def plot_bat_fc_patenting_bysector_bvdcoverage():
    df_batfc_fam = pd.read_csv(PATHS.dropbox / 'Data_outputted/C_PatentVariables/Families_Bat_FC_naics_info.csv')
    df_batfc_fam['docdb_bvd'] = df_batfc_fam['docdb_family_id'].astype(str) + '_' + df_batfc_fam['bvdid'].fillna('')
    maskyears = df_batfc_fam['earliest_filing_year'].isin(range(1990, 2016))
    maskBAT = df_batfc_fam['Sub-sector_exclusive'] == 'batteries'
    maskFC = df_batfc_fam['Sub-sector_exclusive'] == 'fuel cells'
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    for ax, mask in zip([ax1, ax2], [maskBAT, maskFC]):
        maskMotor = df_batfc_fam['MotorVehicle'].notnull() & (df_batfc_fam['MotorVehicle'] > 0)
        maskOEMsorSubsi = df_batfc_fam['OEMorSubsidiary'].notnull() & (df_batfc_fam['OEMorSubsidiary'] > 0)
        masksector = maskMotor | maskOEMsorSubsi
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Motor Vehicle, OEMs and Subsidiaries', color=pal[3], linewidth=3)

        masksector = df_batfc_fam['Electronics'].notnull() & (df_batfc_fam['Electronics'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Electronics', color=pal[0], linewidth=3)

        masksector = df_batfc_fam['MachineryChemical'].notnull() & (df_batfc_fam['MachineryChemical'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Machinery and Chemical Manufacturing', color=pal[-6], linewidth=2)

        masksector = df_batfc_fam['OtherManufacturing'].notnull() & (df_batfc_fam['OtherManufacturing'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Other Manufacturing', color=pal[-4], linewidth=2)

        masksector = df_batfc_fam['EducationRD'].notnull() & (df_batfc_fam['EducationRD'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Education and R&D', color=pal[2], linewidth=2)

        masksector = df_batfc_fam['OtherTransport'].notnull() & (df_batfc_fam['OtherTransport'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Other Transport', color=pal[1], linewidth=2)

        masksector = df_batfc_fam['Utilities'].notnull() & (df_batfc_fam['Utilities'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Utilities', color=pal[-5], linewidth=2)

        masksector = df_batfc_fam['OtherSector'].notnull() & (df_batfc_fam['OtherSector'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Other Sector', color=pal[-3], linewidth=2, linestyle='-')


        masksector = df_batfc_fam['bvdid'].isnull()
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='No BvD id', color='black', linewidth=2, linestyle=':')

        masksector = df_batfc_fam['bvdid'].notnull() & df_batfc_fam['Nbr_NAICS_4digit'].isnull() & (df_batfc_fam['OEMorSubsidiary'] == 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='No NAICS code', color='black', linewidth=2, linestyle='--')
        ax.set_xlabel('Year')
        ax.set_ylabel('Number of DocDB Families')
    plt.subplots_adjust(bottom=0.23)
    handles, labels = ax1.get_legend_handles_labels()
    fig.legend(handles, labels, loc='lower center', ncol=3, fontsize=8, fancybox=True, frameon=True, edgecolor='black', framealpha=.5, borderpad=.7)
    ax1.set_title('Battery patents')
    ax2.set_title('Fuel cell patents')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'BatFC_Patenting_bysector_bvdcoverage.pdf', bbox_inches='tight')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'BatFC_Patenting_bysector_bvdcoverage.png', bbox_inches='tight')
    plt.close()



def plot_bat_fc_patenting_bysector_psncoverage():
    df_batfc_fam = pd.read_csv(PATHS.dropbox / 'Data_outputted/C_PatentVariables/Families_Bat_FC_naics_info.csv')
    df_batfc_fam['docdb_bvd'] = df_batfc_fam['docdb_family_id'].astype(str) + '_' + df_batfc_fam['bvdid'].fillna('')
    df_overlap = pd.read_csv(PATHS.dropbox / 'Data_outputted/C_PatentVariables/Families_overlap.csv')
    df_overlap = df_overlap[['docdb_family_id', 'COMPANY', 'GOV', 'HOSPITAL', 'INDIVIDUAL', 'NON-PROFIT', 'UNIVERSITY', 'UNKNOWN', 'psn_sector_missing']]
    df_batfc_fam = df_batfc_fam.merge(df_overlap, on='docdb_family_id', how='left')
    maskyears = df_batfc_fam['earliest_filing_year'].isin(range(1990, 2016))
    maskBAT = df_batfc_fam['Sub-sector_exclusive'] == 'batteries'
    maskFC = df_batfc_fam['Sub-sector_exclusive'] == 'fuel cells'
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    for ax, mask in zip([ax1, ax2], [maskBAT, maskFC]):
        masksector = df_batfc_fam['bvdid'].isnull()
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='No BvD id', color='black', linewidth=2, linestyle=':')

        masksector = df_batfc_fam['bvdid'].notnull() & df_batfc_fam['Nbr_NAICS_4digit'].isnull() & (df_batfc_fam['OEMorSubsidiary'] == 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='No NAICS code', color='black', linewidth=2, linestyle='--')

        masksector = df_batfc_fam['bvdid'].isnull() & df_batfc_fam['COMPANY']
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='PSN Sector = COMPANY', color=pal[0], linewidth=2, linestyle='-')

        masksector = df_batfc_fam['bvdid'].isnull() & df_batfc_fam['GOV']
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='PSN Sector = GOV', color=pal[1], linewidth=2, linestyle='-')

        masksector = df_batfc_fam['bvdid'].isnull() & df_batfc_fam['INDIVIDUAL']
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='PSN Sector = INDIVIDUAL', color=pal[2], linewidth=2, linestyle='-')

        masksector = df_batfc_fam['bvdid'].isnull() & df_batfc_fam['UNIVERSITY']
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='PSN Sector = UNIVERSITY', color=pal[3], linewidth=2, linestyle='-')

        masksector = df_batfc_fam['bvdid'].isnull() & df_batfc_fam['UNKNOWN']
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='PSN Sector = UNKNOWN', color=pal[4], linewidth=2, linestyle='-')

        masksector = df_batfc_fam['bvdid'].isnull() & (df_batfc_fam['GOV'] | df_batfc_fam['HOSPITAL'] | df_batfc_fam['NON-PROFIT'])
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='PSN Sector = Gov, Non-Profit, Hospital', color=pal[5], linewidth=2, linestyle='-')

        masksector = df_batfc_fam['bvdid'].isnull() & df_batfc_fam['psn_sector_missing']
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='PSN Sector Missing', color=pal[6], linewidth=2, linestyle='-')
        ax.set_xlabel('Year')
        ax.set_ylabel('Number of DocDB Families')
    plt.subplots_adjust(bottom=0.23)
    handles, labels = ax1.get_legend_handles_labels()
    fig.legend(handles, labels, loc='lower center', ncol=3, fontsize=8, fancybox=True, frameon=True, edgecolor='black', framealpha=.5, borderpad=.7)
    ax1.set_title('Battery patents')
    ax2.set_title('Fuel cell patents')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'BatFC_Patenting_bysector_PSNSector_decomposition_ofthosewithnobvdids.pdf', bbox_inches='tight')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'BatFC_Patenting_bysector_PSNSector_decomposition_ofthosewithnobvdids.png', bbox_inches='tight')
    plt.close()

